package cn.com.cfae.iras.doc.analyze.parse.tokenizer;

import cn.com.cfae.iras.doc.analyze.parse.model.ExtractBizModel;
import cn.com.cfae.iras.doc.analyze.parse.model.ExtractItem;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.tokenizer.NLPTokenizer;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

public class YXZWTokenizer extends BaseTokenizer {


    private static Logger logger = LoggerFactory.getLogger(YXZWTokenizer.class);

    private YXZWTokenizer() {
    }

    private static YXZWTokenizer instance = new YXZWTokenizer();
    public static YXZWTokenizer getInstance() {
        return instance;
    }


    public ExtractItem splitWords(ExtractBizModel extractBizModel, String text) {
        if (extractBizModel == null) {
            return null;
        }
        if (StringUtils.isEmpty(text)) {
            return null;
        }
        Sentence sentence = NLPTokenizer.ANALYZER.analyze(text);
        List<IWord> wordList = sentence.wordList;
        IWord iWord = null;
        ExtractItem extractItem = null;
        for (int i = 0; i < wordList.size(); i++) {
            iWord = wordList.get(i);
            String value = iWord.getValue().trim();
            if (extractBizModel.getSynonyms().contains(value)) {
                extractItem = extractBizModel.getExtractItemBySynonym(value);
                logger.info("分析到相关有息债务信息，指标名称：{}，IteName：{}。", value, extractItem.getItemName());
                break;
            }
        }
        return extractItem;
    }
}
